FUN With CSV Files


In [2]:
import unicodecsv
import pprint
import csv

In [3]:
csv_file_name="enrollments.csv"

Json Import of csv file

Import data in json formate, All data are imported as string


In [4]:
enrolment = []

f=open(csv_file_name,'rb')

reader =unicodecsv.DictReader(f)
#reader is a itterater so loop is possible only once
print "type(reader) =",reader

#for each_row in reader:
#    enrolment.append(each_row)
enrolment=list(reader) #shorthand for above two line

#close file
f.close()

print "Total no of row : ",len(enrolment),"\n\n"

#print demo data
pprint.pprint(enrolment[1])


type(reader) = <unicodecsv.py2.DictReader instance at 0x03233AD0>
Total no of row :  1640 


{u'account_key': u'448',
 u'cancel_date': u'2014-11-10',
 u'days_to_cancel': u'5',
 u'is_canceled': u'True',
 u'is_udacity': u'True',
 u'join_date': u'2014-11-05',
 u'status': u'canceled'}

Simple import

Import data Row by row


In [5]:
data = []
with open(csv_file_name, 'rb') as csvfile:
    reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    for row in reader:
        data.append(row)

print "Headings :",(data[0])
print"Data :",(data[1])


Headings : ['account_key,status,join_date,cancel_date,days_to_cancel,is_udacity,is_canceled']
Data : ['448,canceled,2014-11-10,2015-01-14,65,True,True']

Using Pandas ( Very fast than above two)


In [6]:
import pandas as pd

In [7]:
enrol = pd.read_csv(csv_file_name)

In [8]:
enrol[:5]


Out[8]:
account_key status join_date cancel_date days_to_cancel is_udacity is_canceled
0 448 canceled 2014-11-10 2015-01-14 65 True True
1 448 canceled 2014-11-05 2014-11-10 5 True True
2 448 canceled 2015-01-27 2015-01-27 0 True True
3 448 canceled 2014-11-10 2014-11-10 0 True True
4 448 current 2015-03-10 NaN NaN True False

In [9]:
len(enrol["account_key"].unique()) #unique account keys


Out[9]:
1302

In [10]:
type(enrol)


Out[10]:
pandas.core.frame.DataFrame

In [ ]: